######################
#  Replication code for 'Mediating the Electoral Connection', forthcoming in the JOP
#  John Henderson and John Brooks
#  12/7/2015    
######################    

# prelimMain.R
#  :: loads functions and data, and performs some basic data preparation tasks 
                         
if(length(which(installed.packages()[,1]=='AER'))!=1){
	install.packages('AER')                        
}   

library(AER)  
is.even <- function(x) x %% 2 == 0                
source('coeftest.cluster.R')      
            
##############
# <fes.type>           
#  controls the fixed effects and cluster units used             
#   1. decade x district
#   2. decade x icpsr
#   3. icpsr
#  NOTE: only fes.type = 2 and 3 are called, since decade x district has considerable error

if(length(which(objects()=='fes.type'))==0){
	fes.type=3
}
             
##############  
# <non-missings>                
#  controls certain parameters regarding prior outcomes and the use of redistricting 
#   0. original data -- no redistricting years, including no prior outcomes for redistricting cycles
#   4. use icpsr link to measure prior election data, w/ redistricting years included

if(length(which(objects()=='non.missings'))==0){
	non.missings=4
}   

load('FinalRainData.Rdata') 
                            
# load imputed covariates w/o imputing outcomes 
covs=covs_some
               
#############################################################
# reset outcomes to reflect appropriate priors 
       
# 1. original data w/o redistricting years 
if(non.missings==0){       
	
	# election outcomes	
	dem_shr=rain_data$dem_shr
	rep_shr=rain_data$rep_shr	   
	dem_prv=rain_data$dem_prv
	rep_prv=rain_data$rep_prv
		
	dose=dem_shr-rep_shr
	dose_prv=dem_prv-rep_prv
  
	# roll call outcomes
	vote=rain_data$y_inc 
	vote_prv=rain_data$y_prv

    covs$dose=dose
    covs$vote=vote	
    covs$dose_prv=dose_prv
    covs$vote_prv=vote_prv  
	
    covs$turnout=rain_data$turnout          
    covs$turnout_prv=rain_data$turnout_prv

	covs$extremist=rain_data$extremist 
	covs$inc_win=rain_data$inc_win
	covs$inc_win_prv=rain_data$inc_win_prv                                

 	#lagged rain and next rain updates
	#  -- using icpsr for previous baseline  

	covs$rain_elec=rain_data$rain_elec
	
	covs$rain_elec_prev=rain_data$rain_elec[rain_data$ndex]	
	covs$rain_day_prev=rain_data$rain_day[rain_data$ndex]
	covs$rain_weekend_prev=rain_data$rain_weekend[rain_data$ndex]  
	                                                            
	covs$rain_day_prev_prv=rain_data$rain_day_prev[rain_data$ndex]	
	covs$rain_elec_prev_prv=rain_data$rain_elec_prev[rain_data$ndex]
	covs$rain_weekend_prev_prv=rain_data$rain_weekend_prev[rain_data$ndex]

	covs$rain_elec_next=rain_data$rain_elec[rain_data$nedex]
	covs$rain_day_next=rain_data$rain_day[rain_data$nedex]
	covs$rain_weekend_next=rain_data$rain_weekend[rain_data$nedex]   
		
}    

# 2. current data using redistricting years + prior outcomes as controls
if(non.missings==4){  
	
	dem_shr=rain_data$dem_shr
	rep_shr=rain_data$rep_shr
                 	
	dem_prv=dem_shr[rain_data$prior_obs]
	rep_prv=rep_shr[rain_data$prior_obs]

	dose=dem_shr-rep_shr
	dose_prv=dem_prv-rep_prv
  	
	vote=rain_data$y_shr	
	vote_prv=vote[rain_data$prior_obs]
	
	vote[which(rain_data$d_inc==1 & rain_data$d_win==0)]=vote_prv[which(rain_data$d_inc==1 & rain_data$d_win==0)]
	vote[which(rain_data$r_inc==1 & rain_data$r_win==0)]=vote_prv[which(rain_data$r_inc==1 & rain_data$r_win==0)]	
		
    ix=which(rain_data$d_inc==1 & rain_data$d_win==0 & dose>0)
	dose[ix]=NA
	dose_prv[ix]=NA                                           
	ix=which(rain_data$d_inc==1 & rain_data$d_win==1 & dose<0)
	dose[ix]=NA      
	dose_prv[ix]=NA                                           
	ix=which(rain_data$r_inc==1 & rain_data$r_win==0 & dose<0)
	dose[ix]=NA     
	dose_prv[ix]=NA                                           
	ix=which(rain_data$r_inc==1 & rain_data$r_win==1 & dose>0)
	dose[ix]=NA   
	dose_prv[ix]=NA  
	
	covs$dose=dose
    covs$vote=vote	
    covs$dose_prv=dose_prv
    covs$vote_prv=vote_prv  
              
    covs$turnout=rain_data$turnout          
    covs$turnout_prv=rain_data$turnout_prv

	covs$extremist=rain_data$extremist 
	covs$inc_win=rain_data$inc_win
	covs$inc_win_prv=rain_data$inc_win_prv                               

    # lagged rain and next rain updates 
	#  -- using icpsr for previous baseline 
	covs$rain_elec=rain_data$rain_elec
	
	covs$rain_elec_prev=rain_data$rain_elec[rain_data$prior_obs]	
	covs$rain_day_prev=rain_data$rain_day[rain_data$prior_obs]
	covs$rain_weekend_prev=rain_data$rain_weekend[rain_data$prior_obs]  
	                                                            
	covs$rain_day_prev_prv=rain_data$rain_day_prev[rain_data$prior_obs]	
	covs$rain_elec_prev_prv=rain_data$rain_elec_prev[rain_data$prior_obs]
	covs$rain_weekend_prev_prv=rain_data$rain_weekend_prev[rain_data$prior_obs]

	covs$rain_elec_next=rain_data$rain_elec[rain_data$next_obs]
	covs$rain_day_next=rain_data$rain_day[rain_data$next_obs]
	covs$rain_weekend_next=rain_data$rain_weekend[rain_data$next_obs]
	            	                         	                  	
}
        
# assign fixed effects variable
if(fes.type==1){
	fe_id_num=as.numeric(as.character(as.numeric(as.factor(rain_data$decade_district))))
} else if(fes.type==2){
	fe_id_num=as.numeric(as.character(as.numeric(as.factor(rain_data$decade_icpsr))))
} else if(fes.type==3){
	fe_id_num=as.numeric(as.character(as.numeric(as.factor(rain_data$icpsr))))
}                    
     
# covariate matrices        
covs_none=cbind(covs_none,covs[,29:36])
covs_all=cbind(covs_all,as.factor(fe_id_num)) 
covs_some=cbind(covs_some,as.factor(fe_id_num))  
covs_none=cbind(covs_none,as.factor(fe_id_num))  
covs=cbind(covs,as.factor(fe_id_num))


# produce subsets; 
#  -- full: main results
#  -- demx, repx: results by party of incumbent
#  -- extr, modr: results for safe or at-risk incumbents

dist_prev=covs_all$dist_prev
dist_prev_var=dist_prev_avg=array(NA,length(dist_prev))
un_year=unique(rain_data$year)
for(i in 1:length(un_year)){
	inds=which(rain_data$year==un_year[i])
	dist_prev_avg[inds]=mean(dist_prev[inds],na.rm=T)
	dist_prev_var[inds]=sd(dist_prev[inds],na.rm=T)	
}     

dprv=(dist_prev-dist_prev_avg)/dist_prev_var          

# safe v. competitive :: based on presidential vote...

d_prv=rain_data$difDpres[rain_data$prior_obs]
extreme = abs(d_prv)>.075

full=(rain_data$year>1954 & rain_data$lower & rain_data$r_inc!=rain_data$d_inc)
      
# addendum :: additional measure for # terms in office => final seniority results      
# 	- will be easier to produce <terms> then stratify on terms in full
#  also add :: safeness x party slider ...

noms=read.csv('nominate/hou_nom_icpsr.csv',stringsAsFactors=F)
icpsr=rain_data$icpsr   
year=rain_data$year
cong=rain_data$cong
	
un_icpsr=unique(rain_data$icpsr[which(!is.na(rain_data$icpsr))]) 
terms=array(NA,length(un_icpsr))
sns=c()
          
base=array(NA,length(un_icpsr))
for(i in 1:length(un_icpsr)){
	ix = which(rain_data$icpsr==un_icpsr[i])  
	iy=which(noms[,2]==un_icpsr[i])  
	base[i]=length(which(cong[ix[1]]>noms[iy,1]))   
	
	for(j in 1:length(ix)){
		terms[ix][j]=j+base[i]
	}
}
   	    
demx=(rain_data$year>1954 & rain_data$lower & rain_data$d_inc == 1) 
repx=(rain_data$year>1954 & rain_data$lower & rain_data$r_inc == 1)  

modr=(rain_data$year>1954 & rain_data$lower==1 & extreme==F)  
extr=(rain_data$year>1954 & rain_data$lower==1 & extreme==T) 

full=which(full)  
demx=which(demx)
repx=which(repx)
modr=which(modr)
extr=which(extr) 
	                 
# END >>>> next files         